Imports



In [1]:

    
import vm_tools as vmt
import cortex as cx
import matplotlib.pyplot as plt
import numpy as np
import nibabel
import copy
import time
import os
import numpy as np
import nibabel as nib









    



WARNING:docdb.actions.fmri.ImportDicom:Unable to import dicom module.



In [2]:

    
%matplotlib inline



In [10]:

    
def load_data(filename):
    """ Return fMRI data corresponding to the given filename and prints
        shape of the data array
    ----------
    filename : string 
        This string should be a path to given file. The file must be
        .nii 
    Returns
    -------
    data : numpy array
        An array consisting of the data. 
    """
    img = nib.load(filename)
    data = img.get_data()
    data = data[:,:,:,4:]
    print(data.shape)
    return data

def combine_run_arrays(run_array_lst):
    """ Returns a combined 4d array by concatinating based on time (axis = 3)
    ----------
    run_array_lst : array of 4d numpy arrays 

    Returns
    -------
    4d numpy array 
    """
    return np.concatenate(run_array_lst, axis = 3)

Get the feature space, computed for the natural movie data set

Load Gabor wavelet data from files. More advanced storage of different feature spaces and models is possible (and recommended); this is just to show the basic regression procedure.



In [8]:

    
#All file strings corresponding to BOLD data for subject 4 
files = ['task001_run001.bold_dico.nii', 'task001_run002.bold_dico.nii', 
         'task001_run003.bold_dico.nii', 'task001_run004.bold_dico.nii', 
         'task001_run005.bold_dico.nii', 'task001_run006.bold_dico.nii',
         'task001_run007.bold_dico.nii', 'task001_run008.bold_dico.nii']

all_data = []
for index, filename in enumerate(files):
    new_data = load_data('../../data/' + filename) #load_data function drops first 4 for us
    num_vols = new_data.shape[-1]
    if index != 0 and index != 7:
        new_num_vols = num_vols - 4   
        new_data = new_data[:,:,:,:new_num_vols] #Drop last 4 volumes for middle runs    
    print(new_data.shape[-1])
    all_data.append(new_data)

combined_runs = combine_run_arrays(all_data) 
combined_runs = combined_runs[:,:,:,9:] #First 17 seconds are credits/no scene id so drop









    



(160, 160, 36, 447)
447
(160, 160, 36, 437)
433
(160, 160, 36, 434)
430
(160, 160, 36, 484)
480
(160, 160, 36, 458)
454
(160, 160, 36, 435)
431
(160, 160, 36, 538)
534
(160, 160, 36, 259)
259






    



---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-8-95bad4489d0d> in <module>()
     16     all_data.append(new_data)
     17 
---> 18 combined_runs = dl.combine_run_arrays(all_data)
     19 combined_runs = dl.combined_runs[:,:,:,9:] #First 17 seconds are credits/no scene id so drop

AttributeError: 'module' object has no attribute 'combine_run_arrays'



In [8]:

    
# Get stimulus feature spaces
stim_fs_fpath = '/home/yuanwang/projects/OpenfMRI/data'
stim_fs_file = np.load(stim_fs_fpath)

stim_fs_est = 
stim_fs_val = 

# stim_fs_est_s1 = stim_fs_est_allsess[0:len(stim_fs_est_allsess)/3,:]
# stim_fs_val_s1 = stim_fs_val_allsess[0:len(stim_fs_val_allsess)/3,:]

# stim_fs_est = np.zeros((np.shape(stim_fs_est_s1)[0]/2,np.shape(stim_fs_est_s1)[1]))
# stim_fs_val = np.zeros((np.shape(stim_fs_val_s1)[0]/2,np.shape(stim_fs_val_s1)[1]))

# for t in range(np.shape(stim_fs_est)[0]):
#     stim_fs_est[t,:] = np.true_divide(stim_fs_est_s1[2*t,:] + stim_fs_est_s1[2*t+1,:],2)

# for t in range(np.shape(stim_fs_val)[0]):
#     stim_fs_val[t,:] = np.true_divide(stim_fs_val_s1[2*t,:] + stim_fs_val_s1[2*t+1,:],2)



In [9]:

    
# Estimation feature space has 1200 time points, 6555 features
print('Estimation feature space shape: %s'%repr(stim_fs_est.shape))
# Validation has 90 time points, 6555 features
print('Validation feature space shape: %s'%repr(stim_fs_val.shape))









    



Estimation feature space shape: (1200, 747)
Validation feature space shape: (90, 747)

Get fMRI data for one subject



In [10]:

    
# Get data

# with h5py.File('/data/movie_data/SubML_movies_session1_est.hf5','r') as hf:
#     data_est = hf['data'].value
# with h5py.File('/data/movie_data/SubML_movies_session1_val.hf5','r') as hf:
#     data_val = hf['data'].value
 
img = nib.load('ds114_sub004_t2r1.nii')
data_



In [11]:

    
data_est.shape









    Out[11]:





(1200, 30, 100, 100)

Mask fMRI data



In [12]:

    
# Get cortical mask for this subject
# transforming data from 3d to 1d
mask = cx.db.get_mask('MLfs2','MLfs2_nb',type='cortical')



In [13]:

    
# Mask data
data_est_masked = data_est[:,mask]
data_val_masked = data_val[:,mask]
# Show size of masked data
print('Size of masked estimation data is %s'%repr(data_est_masked.shape))
print('Size of masked validation data is %s'%repr(data_val_masked.shape))









    



Size of masked estimation data is (1200, 58108)
Size of masked validation data is (90, 58108)



In [14]:

    
print('Size of masked estimation data is %s'%repr(data_est_masked.shape))
print('Size of masked validation data is %s'%repr(data_val_masked.shape))









    



Size of masked estimation data is (1200, 58108)
Size of masked validation data is (90, 58108)

Set up feature space matrix to do regression



In [15]:

    
# Create lagged stimulus matrix
efs = vmt.utils.add_lags(stim_fs_est,[2,3,4])
vfs = vmt.utils.add_lags(stim_fs_val,[2,3,4])
# Add column of ones
efs = vmt.utils.add_constant(efs,is_first=True)
vfs = vmt.utils.add_constant(vfs,is_first=True)

Run regression



In [16]:

    
alpha = np.logspace(0,6,10)
alpha









    Out[16]:





array([  1.00000000e+00,   4.64158883e+00,   2.15443469e+01,
         1.00000000e+02,   4.64158883e+02,   2.15443469e+03,
         1.00000000e+04,   4.64158883e+04,   2.15443469e+05,
         1.00000000e+06])



In [17]:

    
# Run regression 
n_splits = 10 # number of subdivisions of validation data for cross validation of ridge parameter (alpha)
n_resamps = 10 # number of times to compute regression & prediction within training data (can be <= n_splits)
chunk_sz = 10000 # number of voxels to fit at once. Memory-saving.
pthr = 0.005 # Ridge parameter is chosen based on how many voxels are predicted above a correlation threshold 
             # for each alpha value (technically it's slightly more complicated than that, see the code). 
             # This p value sets that correlation threshold.
t0 = time.time()

out = vmt.regression.ridge_cv(efs,data_est_masked,val_fs=vfs,val_data=data_val_masked,alphas=alpha,n_resamps=n_resamps,
                              n_splits=n_splits,chunk_sz=chunk_sz,pthr=pthr,is_verbose=True)

t1 = time.time()
print("Elapsed time is: %d min, %d sec"%((t1-t0)/60,(t1-t0)%60))









    



Running split 1/10
Running chunk 1 of 6...






    



/usr/local/lib/python2.7/dist-packages/numpy/lib/nanfunctions.py:598: RuntimeWarning: Mean of empty slice
  warnings.warn("Mean of empty slice", RuntimeWarning)
/usr/local/lib/python2.7/dist-packages/numpy/lib/nanfunctions.py:1057: RuntimeWarning: Degrees of freedom <= 0 for slice.
  warnings.warn("Degrees of freedom <= 0 for slice.", RuntimeWarning)
/home/yuanwang/code/vm_tools/Stats/utils.py:24: RuntimeWarning: invalid value encountered in divide
  r = rTmp/n






    



Running chunk 2 of 6...

Running chunk 3 of 6...

Running chunk 4 of 6...

Running chunk 5 of 6...

Running chunk 6 of 6...

Running split 2/10
Running chunk 1 of 6...

Running chunk 2 of 6...

Running chunk 3 of 6...

Running chunk 4 of 6...

Running chunk 5 of 6...

Running chunk 6 of 6...

Running split 3/10
Running chunk 1 of 6...

Running chunk 2 of 6...

Running chunk 3 of 6...

Running chunk 4 of 6...

Running chunk 5 of 6...

Running chunk 6 of 6...

Running split 4/10
Running chunk 1 of 6...

Running chunk 2 of 6...

Running chunk 3 of 6...

Running chunk 4 of 6...

Running chunk 5 of 6...

Running chunk 6 of 6...

Running split 5/10
Running chunk 1 of 6...

Running chunk 2 of 6...

Running chunk 3 of 6...

Running chunk 4 of 6...

Running chunk 5 of 6...

Running chunk 6 of 6...

Running split 6/10
Running chunk 1 of 6...

Running chunk 2 of 6...

Running chunk 3 of 6...

Running chunk 4 of 6...

Running chunk 5 of 6...

Running chunk 6 of 6...

Running split 7/10
Running chunk 1 of 6...

Running chunk 2 of 6...

Running chunk 3 of 6...

Running chunk 4 of 6...

Running chunk 5 of 6...

Running chunk 6 of 6...

Running split 8/10
Running chunk 1 of 6...

Running chunk 2 of 6...

Running chunk 3 of 6...

Running chunk 4 of 6...

Running chunk 5 of 6...

Running chunk 6 of 6...

Running split 9/10
Running chunk 1 of 6...

Running chunk 2 of 6...

Running chunk 3 of 6...

Running chunk 4 of 6...

Running chunk 5 of 6...

Running chunk 6 of 6...

Running split 10/10
Running chunk 1 of 6...

Running chunk 2 of 6...

Running chunk 3 of 6...

Running chunk 4 of 6...

Running chunk 5 of 6...

Running chunk 6 of 6...

Finding best alpha...





    



/home/yuanwang/code/vm_tools/regression.py:409: RuntimeWarning: invalid value encountered in greater
  n_sig_vox_byalpha = sum(trncc_byvox>sig_thresh)-sum(trncc_byvox<-sig_thresh)
/home/yuanwang/code/vm_tools/regression.py:409: RuntimeWarning: invalid value encountered in less
  n_sig_vox_byalpha = sum(trncc_byvox>sig_thresh)-sum(trncc_byvox<-sig_thresh)






    



Best alpha = 46415.888
Computing weights and model predictions...
Running chunk 1 of 6...

Running chunk 2 of 6...

Running chunk 3 of 6...

Running chunk 4 of 6...

Running chunk 5 of 6...

Running chunk 6 of 6...

Elapsed time is: 103 min, 28 sec

Make sure estimation procedure chose a reasonable $\alpha$ value

There should be a somewhat obvious maximum in the curve plotted below



In [18]:

    
# Plot number of voxels with significant prediction accuracy within the 
# estimation data for each alpha value
na = len(out['n_sig_vox_byalpha'])
plt.plot(range(na),out['n_sig_vox_byalpha'],'ko-',lw=2)
# plt.xticks(range(na),vmt.regression.DEFAULT_ALPHAS,rotation=45)
plt.xticks(range(na),alpha,rotation=45)
plt.xlabel('Regularization parameter')
_ = plt.ylabel('Number of voxels\naccurately predicted')

Display prediction accuracy results on the cortical surface



In [20]:

    
out['cc'].shape









    Out[20]:





(58108,)



In [21]:

    
plt.hist(np.nan_to_num(out['cc']),21)









    Out[21]:





(array([  7.00000000e+00,   5.80000000e+01,   2.28000000e+02,
          5.64000000e+02,   1.16400000e+03,   2.37900000e+03,
          4.19000000e+03,   6.35200000e+03,   8.33500000e+03,
          9.05500000e+03,   8.33900000e+03,   6.67400000e+03,
          4.51500000e+03,   2.74600000e+03,   1.46600000e+03,
          8.79000000e+02,   5.13000000e+02,   3.26000000e+02,
          1.95000000e+02,   9.60000000e+01,   2.70000000e+01]),
 array([-0.55591387, -0.49308745, -0.43026104, -0.36743462, -0.30460821,
        -0.24178179, -0.17895538, -0.11612896, -0.05330255,  0.00952387,
         0.07235028,  0.1351767 ,  0.19800311,  0.26082953,  0.32365594,
         0.38648236,  0.44930877,  0.51213519,  0.5749616 ,  0.63778802,
         0.70061443,  0.76344085]),
 <a list of 21 Patch objects>)



In [22]:

    
mask.sum()









    Out[22]:





58108



In [23]:

    
vmt.plot_utils.slice_3d_matrix(mask,0);



In [24]:

    
# Create pycortex Volume out of predictions
br = np.zeros(mask.shape)
br[mask] = out['cc']



In [34]:

    
# output the regression output
aff_output_fpath = '/home/yuanwang/projects/affordance/output/af_out_ml_all_w2v3.hdf5'

with h5py.File(aff_output_fpath,'w') as f:
    brDset = f.create_dataset('br', br.shape, br.dtype)
    brDset[:] = br

    weightsDset = f.create_dataset('weights',out['weights'].shape, out['weights'].dtype)
    weightsDset[:] = out['weights']



In [4]:

    
# input the regression output in the case of firefox crash
with h5py.File('/home/yuanwang/projects/affordance/output/af_out_ml_all.hdf5','r') as hf:
    br = hf['br'].value
    weights = hf['weights'].value.T



In [27]:

    
br.shape









    Out[27]:





(30, 100, 100)



In [28]:

    
br.min()









    Out[28]:





nan



In [29]:

    
np.nanmin(br)









    Out[29]:





-0.55591386556625366



In [30]:

    
np.nanmax(br)









    Out[30]:





0.76344084739685059



In [5]:

    
V = cx.Volume(br,'MLfs2','MLfs2_nb',cmap='hot',vmin=0,vmax=0.6)
# cx.webgl.show(V)



In [7]:

    
cx.quickflat.make_figure(V,with_curvature=True)









    Out[7]:



In [8]:

    
cx.quickflat.make_png('/home/yuanwang/projects/affordance/figure/aff_all_ML.png',V,dpi=300)

Getting output by ROI



In [52]:

    
# # import the saved output
# with h5py.File('/home/yuanwang/projects/affordance/output/af_out_ml.hdf5','r') as hf:
#     aff_br = hf['br'].value
#     aff_weights = hf['weights'].value.T



In [9]:

    
roimasks,roinames = cx.get_roi_masks('MLfs2','MLfs2_nb',Dst='cortical')









    



Cannot find shapely, using simple label placement
24 voxels cut






    



/data/shared_code/anaconda/lib/python2.7/site-packages/cortex/svgsplines.py:370: RuntimeWarning: invalid value encountered in double_scalars
  theta = arccos(R/((Q**3)**.5))
/data/shared_code/anaconda/lib/python2.7/site-packages/cortex/svgsplines.py:372: RuntimeWarning: invalid value encountered in double_scalars
  x1 = real(-1.0*(2.0*(Q**.5)*cos(theta/3.0)) - a/3.0)
/data/shared_code/anaconda/lib/python2.7/site-packages/cortex/svgsplines.py:373: RuntimeWarning: invalid value encountered in double_scalars
  x2 = real(-1.0*(2.0*(Q**.5)*cos((theta + 2.0*pi)/3.0)) - a/3.0)
/data/shared_code/anaconda/lib/python2.7/site-packages/cortex/svgsplines.py:374: RuntimeWarning: invalid value encountered in double_scalars
  x3 = real(-1.0*(2.0*(Q**.5)*cos((theta - 2.0*pi)/3.0)) - a/3.0)
/data/shared_code/anaconda/lib/python2.7/site-packages/cortex/svgsplines.py:370: RuntimeWarning: invalid value encountered in arccos
  theta = arccos(R/((Q**3)**.5))
/data/shared_code/anaconda/lib/python2.7/site-packages/cortex/svgsplines.py:377: RuntimeWarning: invalid value encountered in sqrt
  S_p = -1.0*sign(R)*(abs(R) + M**.5)**(1.0/3)



In [19]:

    
roinames









    Out[19]:





{'AC': 21,
 'ATFP': 9,
 'Broca': 4,
 'EBA': 29,
 'FEF': 1,
 'FFA': 35,
 'FO': 10,
 'IFSFP': 26,
 'IPS': 32,
 'LO': 13,
 'M1F': 2,
 'M1H': 3,
 'M1M': 6,
 'OFA': 22,
 'OPA': 33,
 'PMvh': 14,
 'PPA': 11,
 'RSC': 30,
 'S1F': 28,
 'S1H': 25,
 'S1M': 27,
 'SEF': 12,
 'SMFA': 5,
 'SMHA': 34,
 'V1': 15,
 'V2': 16,
 'V3': 17,
 'V3A': 7,
 'V3B': 8,
 'V4': 18,
 'V7': 19,
 'VO': 24,
 'hMT': 23,
 'pSTS': 20,
 'sPMv': 31}



In [20]:

    
roi_list = ['EBA','FFA', 'OFA', 'V1','V2','V3','V4','LO']



In [25]:

    
aff_weights.shape









    Out[25]:





(58108, 1666)



In [94]:

    
mkd = np.zeros(roimasks.shape)
# for roi in roi_list:
m_eba = roimasks==roinames[roi_list[0]]
mkd[m_eba] = out['cc']









    



---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-94-2b437e12854f> in <module>()
      2 # for roi in roi_list:
      3 m_eba = roimasks==roinames[roi_list[0]]
----> 4 mkd[m_eba] = out['cc']

ValueError: NumPy boolean array indexing assignment cannot assign 49084 input values to the 186 output values where the mask is true



In [87]:

    
m_eba.shape









    Out[87]:





(32, 100, 100)



In [89]:

    
out['cc'].shape









    Out[89]:





(49084,)



In [90]:

    
mask.shape









    Out[90]:





(32, 100, 100)



In [92]:

    
mkd.shape









    Out[92]:





(32, 100, 100)



In [93]:

    
mkd = np.zeros(roimasks.shape)



In [38]:

    
np.shape(aff_weights)









    Out[38]:





(58108, 1666)



In [36]:

    
mask









    Out[36]:





array([[[False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False],
        ..., 
        [False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False]],

       [[False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False],
        ..., 
        [False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False]],

       [[False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False],
        ..., 
        [False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False]],

       ..., 
       [[False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False],
        ..., 
        [False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False]],

       [[False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False],
        ..., 
        [False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False]],

       [[False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False],
        ..., 
        [False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False]]], dtype=bool)



In [22]:

    
rois = vmt.ROISet.from_pycortex('AH','NaturalMovies','AHfs','AHfs_nb')
rois









    



17 voxels cut






    Out[22]:





<vmt.ROISet>
    sub_id : 'AH'
   surf_id : 'AHfs'
       exp : 'NaturalMovies'
       xfm : 'AHfs_nb'
 roi_names : ['FEF', 'M1F', 'M1H', 'Broca', 'SMFA', 'M1M', 'V3A', 'V3B', 'ATFP', 'FO', 'PPA', 'SEF', 'LO', 'PMvh', 'V1', 'V2', 'V3', 'V4', 'V7', 'pSTS', 'AC', 'OFA', 'hMT', 'VO', 'S1H', 'IFSFP', 'S1M', 'S1F', 'EBA', 'RSC', 'sPMv', 'IPS', 'OPA', 'SMHA', 'FFA']
      path : None
       _id : None
data_fields : []



In [63]:

    
rois['V1'].shape









    Out[63]:





(32, 100, 100)



In [86]:

    
mg = np.zeros(v1_mask.shape)
mg[v1_mask] = n2n(rois['V1'].cc)









    



---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-86-997bed7edc29> in <module>()
      1 mg = np.zeros(v1_mask.shape)
----> 2 mg[v1_mask] = n2n(rois['V1'].cc)

ValueError: shape mismatch: value array of shape (598,) could not be broadcast to indexing result of shape (32,100,100,100,100)

Display prediction accuracy by ROI

This makes use of the ROISet class within vm_tools, which is useful for splitting data into ROIs



In [67]:

    
rois = vmt.ROISet.from_pycortex('AH','NaturalMovies','AHfs','AHfs_nb')









    



17 voxels cut



In [68]:

    
# Apply roi mask to estimated correlation coefficients and weights
rois.apply_mask(pre_mask=mask,cc=out['cc'],weights=out['weights'])



In [69]:

    
# Show prediction accuracy for V1 voxels and for FFA voxels
n2n = np.nan_to_num
rois_to_plot = ['V1','FFA','S1H','M1H']
fig = plt.figure(figsize=(15,15))
for iroi,roi in enumerate(rois_to_plot):
    plt.subplot(len(rois_to_plot),1,iroi+1)
    plt.hist(n2n(rois[roi].cc),30)
    plt.xlabel('Prediction accuracy ($r$)')
    plt.ylabel('Count')
    _ = plt.title(roi)

Weight visualization



In [23]:

    
gab,pp = vmt.viz.load_gabor(stim_fs_val_file)



In [24]:

    
h = plt.figure(figsize=(6,6))
ax = plt.subplot(111)
# Prep weights for display
vox = np.argmax(
avgwts = vmt.utils.avg_wts(out['weights'].T[vox]),
                   Mode='wholematrixrespectzero')/2.0+0.5
vmt.viz.show_gabor(to_plot,pp,bgcol=(0.5,0.5,0.5),ax=ax)
plt.title('$r$ = %0.2f'%out['cc'][vox])









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-24-8e36fba46fea> in <module>()
      3 # Prep weights for display
      4 vox = np.argmax(
----> 5 avgwts = vmt.utils.avg_wts(out['weights'].T[vox]),
      6                    Mode='wholematrixrespectzero')/2.0+0.5
      7 vmt.viz.show_gabor(to_plot,pp,bgcol=(0.5,0.5,0.5),ax=ax)

NameError: name 'vox' is not defined



In [49]:

    
DEFAULT_ALPHAS=np.array([0]+[2**x for x in range(10,21)])



In [50]:

    
DEFAULT_ALPHAS









    Out[50]:





array([      0,    1024,    2048,    4096,    8192,   16384,   32768,
         65536,  131072,  262144,  524288, 1048576])



In [ ]: